import re
import time
import pymysql
import requests
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

conn_obj = pymysql.connect(
    host='183.6.107.160',  # MySQL服务端的IP地址
    port=22307,  # MySQL默认PORT地址(端口号)
    user='root',  # 用户名
    password='szl07302',  # 密码,也可以简写为passwd
    database='test_dashboard_dev',  # 库名称,也可以简写为db
    charset='utf8'  # 字符编码
)
cursor = conn_obj.cursor()

option = Options()
option.add_argument("--headless")
option.add_argument("--disable-gpu")
service = Service(r"F:\桌面\win_r\浏览器驱动\chrome/chromedriver.exe")
driver = webdriver.Chrome(options=option, service=service)
file = open(r"D:\program_item\python\爬取文博昵称和头像\a.txt", "r")
list1 = file.readlines()
topicNum = len(list1)
topicIndex = 0
driver.get("https://weibo.com/hot/list/" + list1[topicIndex])
time.sleep(6)
num = 0;
targets = []
new_targets = []
for msg in driver.find_elements(By.CLASS_NAME, "vue-recycle-scroller__item-view"):
    head = msg.find_element(By.CLASS_NAME, "ALink_default_2ibt1")
    targets.append([head.get_attribute("aria-label"), head.find_element(By.TAG_NAME, "img").get_attribute("src")])

driver.execute_script("window.scrollBy(0,1500)")

while topicIndex < topicNum:

    for times in range(80):
        try:
            conn_obj.commit()
            for msg in driver.find_elements(By.CLASS_NAME, "vue-recycle-scroller__item-view"):
                head = msg.find_element(By.CLASS_NAME, "ALink_default_2ibt1")
                new_targets.append(
                    [head.get_attribute("aria-label"), head.find_element(By.TAG_NAME, "img").get_attribute("src")])
            for i in range(len(targets)):
                if targets[i] != new_targets[i]:
                    print(new_targets[i], list1[topicIndex], len(new_targets))
                    sqll = "insert into weibo_data (name,img,topic,`time`) values(%s,%s,%s,now()) on duplicate key update name = values(name) "
                    cursor.execute(sqll, (new_targets[i][0], new_targets[i][1], list1[topicIndex]))
                    num += 1
                    cursor.close()
                    cursor = conn_obj.cursor()
                    print(num)
        except selenium.common.exceptions.StaleElementReferenceException:
            pass
        targets = new_targets
        new_targets = []
        driver.execute_script("window.scrollBy(0,2000)")

    topicIndex = topicIndex + 1

    driver.get("https://weibo.com/hot/list/" + list1[topicIndex])
    time.sleep(4)
